library(tidyverse)
library(readxl)
library(timetk)
library(skimr)
library(datawizard)
library(neuralnet)
library(yardstick)Neural Network for forecasting
Package
Import Data
df <- read_excel("Data_PDRB_siap2.xlsx",sheet = 2)
glimpse(df)Rows: 36
Columns: 9
$ date <chr> "2015-03-01", "2015-06-01", "2015-09-01", "2015-12-0…
$ ADHK <dbl> 20908.25, 21772.33, 24374.33, 22283.07, 22642.59, 23…
$ prod_tbg <dbl> 181240.61, 192835.54, 240285.11, 191611.46, 202663.4…
$ kredit_konstruksi <dbl> 4.272316e+11, 4.523844e+11, 4.848373e+11, 6.085998e+…
$ ekspor <dbl> 252144503, 376672632, 571238429, 279389810, 37400675…
$ pmi_jepang <dbl> 51.36667, 50.30000, 51.30000, 52.53333, 50.50000, 48…
$ ihk <dbl> 98.85533, 98.85300, 98.84140, 98.83007, 98.81210, 98…
$ soi <dbl> -4.666667, -9.366667, -15.500000, -16.533333, -11.70…
$ ikk <dbl> 123.73333, 117.86667, 114.16667, 111.03333, 121.5833…
df <- mutate(df,date = ymd(date))
glimpse(df)Rows: 36
Columns: 9
$ date <date> 2015-03-01, 2015-06-01, 2015-09-01, 2015-12-01, 201…
$ ADHK <dbl> 20908.25, 21772.33, 24374.33, 22283.07, 22642.59, 23…
$ prod_tbg <dbl> 181240.61, 192835.54, 240285.11, 191611.46, 202663.4…
$ kredit_konstruksi <dbl> 4.272316e+11, 4.523844e+11, 4.848373e+11, 6.085998e+…
$ ekspor <dbl> 252144503, 376672632, 571238429, 279389810, 37400675…
$ pmi_jepang <dbl> 51.36667, 50.30000, 51.30000, 52.53333, 50.50000, 48…
$ ihk <dbl> 98.85533, 98.85300, 98.84140, 98.83007, 98.81210, 98…
$ soi <dbl> -4.666667, -9.366667, -15.500000, -16.533333, -11.70…
$ ikk <dbl> 123.73333, 117.86667, 114.16667, 111.03333, 121.5833…
Grafik Time Series
Fungsi pembantu
Code
multi_plot_time_series <- function(data,date,exclude_var=NULL,.interactive=TRUE,n_col=2,n_row=2,.title="Multiple Time Series"){
data %>%
select(-all_of(exclude_var)) %>%
select(all_of(date),where(is.numeric)) %>%
pivot_longer(cols = -all_of(date),
names_to = "variable",
values_to = "value") %>%
group_by(variable) %>%
plot_time_series(.data=,
.date_var = date,
.value = value,
.interactive = .interactive,
.title = .title,
.facet_ncol = n_col,
.facet_nrow = n_row,
.smooth = FALSE)
} Single Time Series
plot_time_series(.data=df,
.date_var = date,
.value = ADHK,
.interactive = TRUE,
.title = "ADHK",
.smooth = FALSE)Multiple Plot
multi_plot_time_series(df,
date = "date",
exclude_var = "ADHK",
.interactive=FALSE,
n_col = 2)Multi Input Multi Output (MIMO)
Fungsi pembantu
Code
prepare_mimo_data <- function(data,
date_col,
input_vars,
output_vars,
lags = 1:12,
horizon = 1,
remove_na = TRUE) {
# Tidyeval the date column
date_col <- rlang::enquo(date_col)
# 1) Order by time index
df_prep <- data %>%
dplyr::arrange(!!date_col)
# 2) Generate lagged inputs via timetk
# Creates columns like: sales_lag1, sales_lag2, ..., price_lag1, ...
df_prep <- df_prep %>%
timetk::tk_augment_lags(
.value = all_of(input_vars),
.lags = lags
)
# 3) Generate future targets via dplyr::lead()
# Creates columns like: sales_h1, sales_h2, ...
df_prep <- df_prep %>%
timetk::tk_augment_leads(
.value = all_of(output_vars),
.lags = -horizon
)
# Build vector of all generated column names
lag_cols <- df_prep %>% select(contains("lag")) %>% names()
lead_cols <- df_prep %>% select(contains("lead")) %>% names()
all_new_cols <- c(sort(lag_cols,decreasing = TRUE), lead_cols)
# 4) Optionally drop rows with NAs in any of the new columns
if (remove_na) {
df_prep <- df_prep %>%
tidyr::drop_na(dplyr::all_of(all_new_cols))
}
# Return the prepared tibble
df_prep <- df_prep %>%
dplyr::select(!!date_col,
dplyr::all_of(all_new_cols)) %>%
dplyr::rename("date_lg0"=!!date_col)
#nm_df_prep <- df_prep %>% select(-!!date_col) %>% names()
#date_nm <- df_prep %>% select(!!date_col) %>% names()
#names(df_prep) <- c(date_nm,sort(nm_df_prep,decreasing = FALSE))
return(df_prep)
}Struktur Data MIMO
prepare_mimo_data(data = df,
date_col = date,
input_vars = c("ADHK"),
output_vars = c("ADHK"),
lags = 0:2,
remove_na = FALSE,
horizon = 1:4)Time Series Nerual Network
Fungsi pembantu
Code
train_mlp_mimo <- function(formula = formula,data=data,hidden=hidden,activation="linear",...){
data0 <- data
data1 <- data %>% drop_na()
data <- standardize(data1,select = is.numeric,robust = TRUE)
data1 <- standardize(data0,select = is.numeric,robust = TRUE,reference=data1)
activation = switch(
activation,
tanh = function(x) tanh(x),
linear = function(x) x,
logistic ="logistic"
)
model <- neuralnet(formula = formula,
data = data,hidden = hidden, act.fct = activation,...)
model$data0 <- data0
model$data1 <- data1
return(model)
}Code
forecast_mlp <- function(model,real_out_cols){
df_dt <- model$data0 %>%
select(where(is.Date)) %>%
pull()
date_smry <- tk_get_timeseries_summary(df_dt)
future_date <- tk_make_timeseries(df_dt %>% tail(1),
length_out = 1+ncol(model$response),
by = date_smry$scale)
future_date <- future_date[-1]
forecast <- predict(model,newdata = model$data1 %>% slice_tail(n=1)) %>%
as.data.frame()
names(forecast) <- colnames(model$response)
forecast <- unstandardize(forecast,robust = TRUE,
reference = drop_na(model$data0)
) %>%
as.numeric()
past_data <- model$data0 %>%
select(where(is.Date),all_of(real_out_cols)) %>%
mutate(type="actual") %>%
magrittr::set_names(c("date","value","type"))
forecast <- data.frame(date=future_date,value=forecast,type="forecast")
forecast <- bind_rows(past_data,forecast)
return(forecast)
}Code
forecast_mlp_plot <- function(result,test_data){
result %>%
bind_rows(test_data %>%
mutate(type="actual") %>%
magrittr::set_names(c("date","value","type"))
) %>%
plot_time_series(.date_var = date,
.value = value,
.interactive = TRUE,
.title = "Forecast Plot",
.color_var = type,
.legend_show = FALSE,
.smooth = FALSE)
}Single Input Single Output
Pembagian data
train_df1 <- df %>%
filter(date<="2023-09-01")
train_df1$date [1] "2015-03-01" "2015-06-01" "2015-09-01" "2015-12-01" "2016-03-01"
[6] "2016-06-01" "2016-09-01" "2016-12-01" "2017-03-01" "2017-06-01"
[11] "2017-09-01" "2017-12-01" "2018-03-01" "2018-06-01" "2018-09-01"
[16] "2018-12-01" "2019-03-01" "2019-06-01" "2019-09-01" "2019-12-01"
[21] "2020-03-01" "2020-06-01" "2020-09-01" "2020-12-01" "2021-03-01"
[26] "2021-06-01" "2021-09-01" "2021-12-01" "2022-03-01" "2022-06-01"
[31] "2022-09-01" "2022-12-01" "2023-03-01" "2023-06-01" "2023-09-01"
train_df01 <- df %>%
filter(date<="2023-09-01")
train_df01$date [1] "2015-03-01" "2015-06-01" "2015-09-01" "2015-12-01" "2016-03-01"
[6] "2016-06-01" "2016-09-01" "2016-12-01" "2017-03-01" "2017-06-01"
[11] "2017-09-01" "2017-12-01" "2018-03-01" "2018-06-01" "2018-09-01"
[16] "2018-12-01" "2019-03-01" "2019-06-01" "2019-09-01" "2019-12-01"
[21] "2020-03-01" "2020-06-01" "2020-09-01" "2020-12-01" "2021-03-01"
[26] "2021-06-01" "2021-09-01" "2021-12-01" "2022-03-01" "2022-06-01"
[31] "2022-09-01" "2022-12-01" "2023-03-01" "2023-06-01" "2023-09-01"
test_df1 <- df %>%
select(date,ADHK) %>%
filter(date>"2023-09-01")
test_df1$date[1] "2023-12-01"
Reshaping MIMO Data
train_df1 <- prepare_mimo_data(data = train_df1,
date_col = date,
input_vars = c("ADHK"),
output_vars = c("ADHK"),
lags = 0:4,
remove_na = FALSE,
horizon = 1)
train_df1train_df01 <- prepare_mimo_data(data = train_df01,
date_col = date,
input_vars = c("ADHK"),
output_vars = c("ADHK"),
lags = 0:4,
remove_na = FALSE,
horizon = 1)
train_df01test_df1Modeling
set.seed(2045)
mod1 <- train_mlp_mimo(ADHK_lead1 ~ ADHK_lag0,data=train_df1,
hidden = c(5),
activation = "linear")
plot(mod1)set.seed(2045)
mod01 <- train_mlp_mimo(ADHK_lead1 ~ ADHK_lag0 + ADHK_lag1 + ADHK_lag2 + ADHK_lag3 + ADHK_lag4,data=train_df01,
hidden = c(5),
activation = "linear")res1 <- forecast_mlp(model = mod1,real_out_cols="ADHK_lag0")
res1 %>%
filter(type=="forecast")res01 <- forecast_mlp(model = mod01,real_out_cols="ADHK_lag0")
res01 %>%
filter(type=="forecast")test_df1rmse_vec(truth = test_df1$ADHK,
estimate = filter(res1,type=="forecast") %>% pull(value))[1] 1362.644
mape_vec(truth = test_df1$ADHK,
estimate = filter(res1,type=="forecast") %>% pull(value))[1] 5.072655
rmse_vec(truth = test_df1$ADHK,
estimate = filter(res01,type=="forecast") %>% pull(value))[1] 561.7437
mape_vec(truth = test_df1$ADHK,
estimate = filter(res01,type=="forecast") %>% pull(value))[1] 2.091179
forecast_mlp_plot(result = res01,test_data = test_df1)Single Input Multi Output
Pembagian data
train_df2 <- df %>%
filter(date<="2023-06-01")
train_df2$date [1] "2015-03-01" "2015-06-01" "2015-09-01" "2015-12-01" "2016-03-01"
[6] "2016-06-01" "2016-09-01" "2016-12-01" "2017-03-01" "2017-06-01"
[11] "2017-09-01" "2017-12-01" "2018-03-01" "2018-06-01" "2018-09-01"
[16] "2018-12-01" "2019-03-01" "2019-06-01" "2019-09-01" "2019-12-01"
[21] "2020-03-01" "2020-06-01" "2020-09-01" "2020-12-01" "2021-03-01"
[26] "2021-06-01" "2021-09-01" "2021-12-01" "2022-03-01" "2022-06-01"
[31] "2022-09-01" "2022-12-01" "2023-03-01" "2023-06-01"
test_df2 <- df %>%
select(date,ADHK) %>%
filter(date>"2023-06-01")
test_df2$date[1] "2023-09-01" "2023-12-01"
Reshaping MIMO Data
train_df2 <- prepare_mimo_data(data = train_df2,
date_col = date,
input_vars = c("ADHK"),
output_vars = c("ADHK"),
lags = 0,
remove_na = FALSE,
horizon = 1:2)
train_df2test_df2Modeling
set.seed(2045)
mod2 <- train_mlp_mimo(ADHK_lead1 + ADHK_lead2 ~ ADHK_lag0,data=train_df2,
hidden = c(5),
activation = "linear")
plot(mod2)res2 <- forecast_mlp(model = mod2,real_out_cols="ADHK_lag0")
res2 %>%
filter(type=="forecast")test_df2rmse_vec(truth = test_df2$ADHK,
estimate = filter(res2,type=="forecast") %>% pull(value))[1] 2476.009
mape_vec(truth = test_df2$ADHK,
estimate = filter(res2,type=="forecast") %>% pull(value))[1] 9.128468
forecast_mlp_plot(result = res2,test_data = test_df2)Multi Input Multi Output
Pembagian data
train_df3 <- df %>%
filter(date<="2023-06-01")
train_df3$date [1] "2015-03-01" "2015-06-01" "2015-09-01" "2015-12-01" "2016-03-01"
[6] "2016-06-01" "2016-09-01" "2016-12-01" "2017-03-01" "2017-06-01"
[11] "2017-09-01" "2017-12-01" "2018-03-01" "2018-06-01" "2018-09-01"
[16] "2018-12-01" "2019-03-01" "2019-06-01" "2019-09-01" "2019-12-01"
[21] "2020-03-01" "2020-06-01" "2020-09-01" "2020-12-01" "2021-03-01"
[26] "2021-06-01" "2021-09-01" "2021-12-01" "2022-03-01" "2022-06-01"
[31] "2022-09-01" "2022-12-01" "2023-03-01" "2023-06-01"
test_df3 <- df %>%
select(date,ADHK) %>%
filter(date>"2023-06-01")
test_df3$date[1] "2023-09-01" "2023-12-01"
Reshaping MIMO Data
train_df3 <- prepare_mimo_data(data = train_df3,
date_col = date,
input_vars = c("ADHK"),
output_vars = c("ADHK"),
lags = 0:1,
remove_na = FALSE,
horizon = 1:2)
train_df3test_df3Modeling
set.seed(2045)
mod3 <- train_mlp_mimo(ADHK_lead1 + ADHK_lead2 ~ ADHK_lag0 + ADHK_lag1,
data=train_df3,
hidden = c(10),
activation = "linear")
plot(mod3)res3 <- forecast_mlp(model = mod3,real_out_cols="ADHK_lag0")
res3 %>%
filter(type=="forecast")test_df3rmse_vec(truth = test_df3$ADHK,
estimate = filter(res3,type=="forecast") %>% pull(value))[1] 2331.135
mape_vec(truth = test_df3$ADHK,
estimate = filter(res3,type=="forecast") %>% pull(value))[1] 8.729147
forecast_mlp_plot(result = res3,test_data = test_df3)